library(rio)
library(stringr)
library(ggplot2)
library(dplyr)
library(knitr)
library(kableExtra)

wd = getwd()

data = import(paste0(wd, "/tab1.xlsx"))

data$year = as.numeric(str_extract(data$Study, "\\d+"))

### draw figure 1

not_published = c("Deichert, 2019c", "Scaduto & Negri, 2024c")

scale_x = seq(2009,2024, by=1)

scale_x[length(scale_x)] = "2024\n(until March)"

ggplot(data[data$year>=2009 & data$year<=2024 & !(data$Study %in% not_published), ], aes(x=year))+
  geom_bar(fill='red')+
  scale_y_continuous(breaks = seq(0,12, by=1))+
  scale_x_continuous(breaks=seq(2009,2024, by=1),labels = scale_x)+
  xlab("Year of publication")+
  ylab("Number of empirical studies published")+
  theme_minimal()

ggsave("barplot_studies.png", path=wd, width = 8, height = 4)

## percentages related to table 1

aus = data.frame(round(prop.table(table(data$`Found evidence?`))*100, digits=1))

perc_found = sum(aus[grepl("Yes", aus$Var1), ]$Freq)
perc_partial = sum(aus[grepl("Partial", aus$Var1), ]$Freq)


perc_US = nrow(data[grepl("US",data$`National context`), ])/nrow(data)

dfaus = as.data.frame(prop.table(table(data$`Target of the inference`))*100)

tot_people = dfaus[dfaus$Var1 == "People", ]$Freq

tot_politicians = dfaus[dfaus$Var1 == "Politicians", ]$Freq

dfaus= as.data.frame(prop.table(table(data$`Political trait`))*100)

tot_partisanship = sum(dfaus[grepl("Partisanship", dfaus$Var1), ]$Freq)

tot_ideology = sum(dfaus[grepl("Ideology", dfaus$Var1), ]$Freq)

tot_issue = sum(dfaus[grepl("Issue", dfaus$Var1), ]$Freq)

dfaus = as.data.frame(prop.table(table(data$`Apolitical trait`))*100)

tot_cultural=sum(dfaus[grepl("Lifestyle", dfaus$Var1), ]$Freq)
tot_identity=sum(dfaus[grepl("Psych", dfaus$Var1), ]$Freq)
tot_group=sum(dfaus[grepl("Socdem", dfaus$Var1), ]$Freq)
tot_other=sum(dfaus[grepl("Other", dfaus$Var1), ]$Freq)

dfaus = as.data.frame(prop.table(table(data$`Direction of the inference`))*100)

tot_p2a = sum(dfaus[grepl("PàA", dfaus$Var1), ]$Freq)
tot_a2p = sum(dfaus[grepl("AàP", dfaus$Var1), ]$Freq)

dfaus = as.data.frame(prop.table(table(data$`Mentioned inferential strategy`))*100)

tot_proj = sum(dfaus[grepl("roject", dfaus$Var1), ]$Freq)
tot_ster = sum(dfaus[grepl("Ster", dfaus$Var1), ]$Freq)

tot_us = nrow(data[data$`National context` == "US", ])*100/nrow(data)
tot_out_us = 100-tot_us

tot_surv = sum(grepl("exper", data$`Empirical strategy`))*100/(nrow(data))

###table 2 percentages

tot_sec_quadr =nrow(data[grepl("Stereotyping", data$`Mentioned inferential strategy`) & grepl("AàP", data$`Direction of the inference`), ])/nrow(data)
tot_terz_quadr = nrow(data[grepl("Stereotyping", data$`Mentioned inferential strategy`) & grepl("PàA", data$`Direction of the inference`), ])/nrow(data)
tot_primo_quadr = nrow(data[grepl("rojection", data$`Mentioned inferential strategy`) & grepl("AàP", data$`Direction of the inference`), ])/nrow(data)
tot_quarto_quadr = nrow(data[grepl("rojection", data$`Mentioned inferential strategy`) & grepl("PàA", data$`Direction of the inference`), ])/nrow(data)

